Mortalitydata=read.csv(file.choose())
str(Mortalitydata)
#Q1
data.frame(Mortalitydata)
crude_rate <- function(CRUDE)
{
  crude_m <- (Mortalitydata$DEATHS)/Mortalitydata$ETR
}

Crude_rates=crude_rate(Mortalitydata)
View(Mortalitydata)

Mortalitydata[,4]
Mortalitydata$CRUDE <- Crude_rates
View(Mortalitydata)

#Q2
##Graduated Rates
###taking log of the Gompertz Formula to get a linear equation of x
Gompertz<-lm(log(Mortalitydata$CRUDE)~Mortalitydata$AGE)
Gompertz

coef(Gompertz)

B = exp(as.numeric(coef(Gompertz)))[1]
C = exp(as.numeric(coef(Gompertz)))[2]
B
C
#Therefore Graduated rates are
Mortalitydata$GRADUATED<-B*C^Mortalitydata$AGE
head(Mortalitydata)
#Q3
##Calculating Third Differences
diff<-function(v)v[-1]-v[-length(v)]
diff_crude<-round(diff(diff(diff(Mortalitydata$CRUDE)))*10^6,0)
diff_graduated<-round(diff(diff(diff(Mortalitydata$GRADUATED)))*10^6,0)

cbind(Mortalitydata$AGE,diff_crude,diff_graduated)
#Q4
##Expected Data
Mortalitydata$EXPECTED<-round(Mortalitydata$GRADUATED*Mortalitydata$ETR,2)
head(Mortalitydata)

##Zx Value
Mortalitydata$ZX<-(Mortalitydata$DEATHS-Mortalitydata$EXPECTED)/sqrt(Mortalitydata$EXPECTED)
head(Mortalitydata)

#chi-square test
data<-data.frame(Mortalitydata$DEATHS,Mortalitydata$EXPECTED)
chisq.test(data)
#model is good fit since p value is smaller than 0.05
#defree of freedom 50

#Q5
#A 
Std_test=table(cut(Mortalitydata$ZX, breaks = seq.int(from = -20,to = 20, by= 4)))
Std_test

#B SIGNS TEST
library(plyr)
pos_neg=count(Mortalitydata$ZX>0)
pos_neg
binom.test(31,51,alternative = "less")
#no grouping

#C cumulative deviation test
cl_dv_test = (sum(Mortalitydata$DEATHS)-sum(Mortalitydata$EXPECTED))/sqrt(sum(Mortalitydata$EXPECTED))
cl_dv_test
pnorm(cl_dv_test)
#D SERIAL CORRELATION TEST
z1 <- Mortalitydata$ZX[1:50] 
z2 <- Mortalitydata$ZX[2:51] 
correlationn=cor(z1, z2)
correlationn
test_statistic=sqrt(51)*correlationn
test_statistic